This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
#setwd('~/./Writing programs') #changes file location
wnv <- read.csv("wnv.csv") #adds wnv.csv dataset to wnv
#head(wnv) #check first 6 lines of dataset
library(ggplot2) #loads ggpplot library
## Warning: package 'ggplot2' was built under R version 3.4.4
library(plotly)
## Warning: package 'plotly' was built under R version 3.4.4
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ggplot(data=wnv) +
geom_histogram(aes(x=Year, fill=State)) +
labs(x="State", y="Total", title='WNV infection frequency in the USA',
caption="Data from: the interweb") #the outoput is not informative, facet by year
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplotly(p=ggplot2::last_plot())
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Histogram
ggplot(data=wnv, mapping=aes(x=Total)) +
geom_histogram(mapping = aes(fill=State)) +
scale_y_continuous(limits = c(0,50)) +
labs(x='Year', y='Total;', title='WNV infection frequency in the USA, a state analysis', caption='Data from the interweb')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 35 rows containing missing values (geom_bar).
ggplotly(p=ggplot2::last_plot())
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Histogram with a log scale for x axis - cannot force log10 of x axis of a histogram
ggplot(data=wnv, mapping=aes(x=Total)) +
geom_histogram(mapping = aes(fill=State)) +
scale_y_continuous(limits = c(0,50)) +
labs(x='Year', y='Total;', title='WNV infection frequency in the USA, a state analysis', caption='Data from the interweb')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 35 rows containing missing values (geom_bar).
scale_x_log10()
## <ScaleContinuousPosition>
## Range:
## Limits: 0 -- 1
ggplotly(p=ggplot2::last_plot())
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#FACETING HISTOGRAM WITH AXIS LIMITS
ggplot(data=wnv, mapping=aes(x=Total)) +
geom_histogram(mapping = aes(fill=State)) +
facet_wrap(~ Year) +
scale_y_continuous(limits = c(0,50)) +
labs(x='Year', y='Total;', title='WNV infection frequency in the USA, a state analysis', caption='Data from the interweb')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplotly(p=ggplot2::last_plot())
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#### Calculate case fatality rate ####
#Calculate case fatilty rate (cfr): #calculated value will appear here, though will not be appended to original dataset
wnv$cfr <- wnv$Fatal/wnv$Total
head(wnv) # look at first 10 lines of data including cfr
## State Year EncephMen Fever Other Total Fatal Latitude Longitude
## 1 New York 1999 59 3 0 62 7 42.54 -75.28
## 2 Connecticut 2000 0 1 0 1 0 41.51 -72.76
## 3 New Jersey 2000 5 1 0 6 1 40.17 -74.72
## 4 New York 2000 14 0 0 14 1 42.54 -75.28
## 5 Alabama 2001 2 0 0 2 1 32.28 -86.92
## 6 Connecticut 2001 6 0 0 6 1 41.51 -72.76
## cfr
## 1 0.11290323
## 2 0.00000000
## 3 0.16666667
## 4 0.07142857
## 5 0.50000000
## 6 0.16666667
#Below plot has removed mapping statement = it still works in this example
ggplot(data=wnv, mapping=aes(x=Total)) +
geom_histogram(aes(x=cfr, fill=State)) +
scale_y_continuous(limits = c(0,100)) +
labs(x='Case fatality rate', y='Total;', title='WNV infection frequency in the USA, a state analysis', caption='Data from the interweb')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplotly(p=ggplot2::last_plot())
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#FACET THE CASE FATALITY RATE HISTOGRAM
#I HAVE ALSO SUBSEQUENTLY CHANGED Y AXIS MAX TO 25 AND 100 WAS TOO MUCH WHEN INITIALLY PLOTTED
ggplot(data=wnv, mapping=aes(x=Total)) +
geom_histogram(aes(x=cfr, fill=State)) +
facet_wrap(~ Year) +
scale_y_continuous(limits = c(0,25)) +
labs(x='Case fatality rate', y='Total;', title='WNV infection frequency in the USA, a state analysis', caption='Data from the interweb')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplotly(p=ggplot2::last_plot())
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#CHANGE YEAR FROM NUMERICAL TO FACTOR
wnv$Year <- as.factor(wnv$Year)
#GRAPH TOTAL NUMBERS BY STATE AND ROTATE X AXIS TITLES (this is total numbers, not cfr)
ggplot(data=wnv, mapping=aes(x=State)) +
geom_histogram(mapping = aes(x=State, y=Total, fill=Year), stat='identity') +
labs(x='State', y='Total', title='WNV infection frequency in the USA by state', caption='Data from the interweb') +
theme(axis.text.x=element_text(angle = -90, hjust = 0))
## Warning: Ignoring unknown parameters: binwidth, bins, pad
ggplotly(p=ggplot2::last_plot())
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#remove negative sign to swap around text - test & rerun code to see!
#GRAPH CASE FATALITY RATIO (CFR) BY STATE AND ROTATE X AXIS TITLES
ggplot(data=wnv, mapping=aes(x=State)) +
geom_histogram(mapping = aes(x=State, y=cfr, fill=Year), stat='identity') +
labs(x='State', y='Total', title='WNV case fatality ratio in the USA by state', caption='Data from the interweb') +
theme(axis.text.x=element_text(angle = -90, hjust = 0))
## Warning: Ignoring unknown parameters: binwidth, bins, pad
ggplotly(p=ggplot2::last_plot())
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
#### calculate mean and sd ####
#Write a function to calculate the mean and standard error (standard deviation
#divided by the square root of the sample size) of the neuroinvasive disease rate for all the
#states in a given list and given set of years. Follow the Google R style and remember to place
#the function near the top of your script. Use your function to calculate the average severe
#disease rate in California, Colorado, and New York
EncephMen <- c(wnv$EncephMen)
mean <- function(x){
s <- sum(x)
n <- length(x)
m <- s/n
return (m)}
mean(EncephMen) #as I have stipulated x= EncephMen in wnv dataset
## [1] 40.68382
sd(EncephMen)
## [1] 79.81267
standard.error <- function(x){
sqrt(var(x)/length(x))
}
standard.error(EncephMen)
## [1] 4.839354
#NOW CALCULATE FOR CALIFORNIA, COLORADO, AND NEW YORK ONLY
#NEED TO WORK WITH SUBSET OF DATA
EncephMenCaliforniaSubset <- (data=subset(wnv$EncephMen, wnv$State=='California'))
mean(EncephMenCaliforniaSubset)
## [1] 137.6667
sd(EncephMenCaliforniaSubset)
## [1] 133.7844
standard.error(EncephMenCaliforniaSubset)
## [1] 54.61725
EncephMenColoradoSubset <- (data=subset(wnv$EncephMen, wnv$State=='Colorado'))
mean(EncephMenColoradoSubset)
## [1] 142.3333
sd(EncephMenColoradoSubset)
## [1] 236.8136
standard.error(EncephMenColoradoSubset)
## [1] 96.67873
EncephMenNewYorkSubset <- (data=subset(wnv$EncephMen, wnv$State=='New York'))
mean(EncephMenNewYorkSubset)
## [1] 31.11111
sd(EncephMenNewYorkSubset)
## [1] 23.63496
standard.error(EncephMenNewYorkSubset)
## [1] 7.878318
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.